import numpy as np
import pandas as pd
import feather, pickle
import os, subprocess
from tqdm import tqdm
from itertools import combinations

# Load the feature set
breath_features = feather.read_dataframe('breath_features.feather')
columns_reduced = ['alpha_0', 'ts__absolute_sum_of_changes', 'ts__ar_coefficient__coeff_3__k_10',
                   'ts__number_peaks__n_1', 'ts__number_cwt_peaks__n_1', 'ts__partial_autocorrelation__lag_3',
                   'ts__ar_coefficient__coeff_4__k_10', 'omega', 'ts__number_cwt_peaks__n_5', 'ts__kurtosis']
breath_features = breath_features[columns_reduced]

num_users = 94
users = range(num_users)
user_pairs = list(combinations(list(users), 2))

def clear_terminal():
    os.system('cls' if os.name == 'nt' else 'clear')

def shuffle_split_train_test(trial=None, num_users = 94, users=None, samples = 190):

    directory = f'train_test_{trial}'
    # Create a directory if it doesn't exist
    if not os.path.exists(directory):
        os.makedirs(directory)
        
    n_features = breath_features.shape[1]
    columns = breath_features.columns
    train_dataset = np.zeros((num_users, int(0.6*190), breath_features.shape[1]))
    test_dataset = np.zeros((num_users, 190-int(0.6*190), breath_features.shape[1]))
    
    pbar_users = tqdm(users, unit='users', desc=f'Split train test (Trial {trial})',
                      ncols=100, position=0, leave=False)
    for user in pbar_users:
        # Load the data of a user
        data = breath_features[user*samples:user*samples+samples]
        
        # Reshape dataframe into a 3D array with shape (10, 19, n_features)
        data_3d = data.values.reshape((10, 19, n_features))
        
        # Shuffle the 10 blocks of 19 samples
        np.random.shuffle(data_3d)
        
        # Reshape 3D array back into 2D dataframe
        data_shuffled = pd.DataFrame(data_3d.reshape((190, n_features)), columns=columns)

        #  Split the shuffled data into 60% training and 40% testing
        train_set = data_shuffled[:int(0.6*190)]
        test_set = data_shuffled[int(0.6*190):]
                
        # Adding train_set and test_set to their respective dataset arrays
        train_dataset[user] = train_set
        test_dataset[user] = test_set

    # Write train and test datasets
    with open(f'{directory}//train_dataset.npy', 'wb') as f:
        np.save(f, train_dataset)
    with open(f'{directory}//test_dataset.npy', 'wb') as f:
        np.save(f, test_dataset)

    return train_dataset, test_dataset, directory

def build_models(n_partitions=12, trial=None):

    # Create a temporary directory if it doesn't exist
    temp_directory = 'model_building_partition_files'
    if not os.path.exists(temp_directory):
        os.makedirs(temp_directory)

    # Split the test user pairs into batches
    batch_size = len(user_pairs) // n_partitions
    batches = [user_pairs[i:i+batch_size] for i in range(0, len(user_pairs), batch_size)]

    # Spawn subprocesses to test each batch of user pairs
    processes = []
    for i, batch in enumerate(batches):
        fname = f"{temp_directory}//partition_{i}.npy"
        np.save(fname, batch)
        cmd = f"python model_building_partition.py {fname} {i}"
        p = subprocess.Popen(cmd.split())
        processes.append(p)

    model_scores = np.array([])
    pbar_processes = tqdm(processes, unit='partition', desc=f'Building models (Trial {trial})',
                          ncols=100, position=0, leave=False)
    for i, p in enumerate(pbar_processes):
        # Wait for all subprocesses to finish
        p.wait()
        scores = np.load(f"{temp_directory}//score_{i}.npy")
        model_scores = np.concatenate([model_scores, scores])
    fname = f"model_scores_{trial}.npy"
    np.save(fname, model_scores)

    # Delete all temporary files in the current directory
    _ = [os.remove(f'{temp_directory}//{f}') for f in os.listdir(f'{temp_directory}')]

def user_identification(n_partitions=12, trial=None):
    
    # Create a temporary directory if it doesn't exist
    temp_directory = 'uis_partition_files'
    if not os.path.exists(temp_directory):
        os.makedirs(temp_directory)

    # Split the test users into batches
    batch_size = len(users) // n_partitions
    batches = [users[i:i+batch_size] for i in range(0, len(users), batch_size)]

    # Spawn subprocesses to test each batch of users
    processes = []
    for i, batch in enumerate(batches):
        fname = f"{temp_directory}//partition_{i}.npy"
        np.save(fname, batch)
        cmd = f"python user_identification_partition.py {fname} {i}"
        p = subprocess.Popen(cmd.split())
        processes.append(p)

    result = pd.DataFrame(columns=[user for user in users])
    pbar_processes = tqdm(processes, unit='partition', desc=f'Identification (Trial {trial})',
                          ncols=100, position=0, leave=False)
    for p in pbar_processes:
    # Wait for all subprocesses to finish
        p.wait()
        # Combine the results from all subprocesses
        with open(os.path.join(temp_directory, f"result_{p.pid}.feather"), "rb") as f:
            result_chunk = feather.read_dataframe(f).rename(columns={str(col): int(col) for col in result.columns})
            result = pd.concat([result, result_chunk]).reset_index(drop=True)
        result = result.fillna(0).astype(int)
    
    feather.write_dataframe(result, f'identification_matrix_{trial}.feather')

    # Delete all temporary files in the current directory
    _ = [os.remove(f'{temp_directory}//{f}') for f in os.listdir(f'{temp_directory}') if f.endswith('.npy') or f.endswith('.feather')]

# Define trials
num_trials = 66;  start = 0
trials = np.arange(start, start + num_trials)

for trial in trials:
    clear_terminal()
    # Write the trial to a file using pickle
    with open("trial.pickle", 'wb') as file:
        pickle.dump(trial, file)

    train_dataset, test_dataset, data_directory = shuffle_split_train_test(trial=trial, users=users, samples = 190)
    
    # Write the data directory to the file using pickle
    with open("data_directory.pickle", 'wb') as file:
        pickle.dump(data_directory, file)
    # Create a model directory if it doesn't exist
    model_directory = f'models_{trial}'
    if not os.path.exists(model_directory):
        os.makedirs(model_directory)
    # Write the model directory to the file using pickle
    with open("model_directory.pickle", 'wb') as file:
        pickle.dump(model_directory, file)
    clear_terminal()

    build_models(n_partitions=12, trial=trial)
    clear_terminal()

    user_identification(n_partitions=12, trial=trial)
    clear_terminal()
